Notebook Overiew

This notebook is a part of a series that documents the review analysis for the DRN Cell Types project.

This notebook contains the code used to co-cluster the DRN 5-HT neurons in the Zeisel et al. (2018) dataset with our DRN 5-HT neuronal subset. We will exclude the Okaty et al. (2015) cells since there are only 8 cells in that dataset.

Analysis Workflow

1. Initialize

1.1 Load libraries

library(devtools)
library(useful)
Loading required package: ggplot2
library(dplyr)

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union
library(ggplot2)
library(reticulate)
library(Seurat)
Loading required package: cowplot

Attaching package: ‘cowplot’

The following object is masked from ‘package:ggplot2’:

    ggsave

Loading required package: Matrix
library(stringr)
library(Matrix)
library(parallel)
library(ape)

1.2 Load datasets

wd <- "/Volumes/LaCie/Dropbox/Sabatini Lab/DRN Cell Types Project/DRN Cell Types Manuscript/Revisions (1)/RNA-seq/"
dRaphe.neurons <- readRDS(file.path(wd, "DRN_inDrop_neurons.rds"))
dRaphe.neurons.5HT <- SubsetData(object = dRaphe.neurons,
                            ident.use = c("5-HT-I",
                                          "5-HT-II",
                                          "5-HT-III",
                                          "5-HT-IV",
                                          "5-HT-V"),
                            subset.raw = TRUE)
dim(dRaphe.neurons.5HT@data)
[1] 15941   704
zeisel.ob.SER <- readRDS(file.path(wd, "zeisel_Seurat_HBSER_clustered_filtered.rds"))
dim(zeisel.ob.SER@data)
[1] 13825   374

1.3 Subset the R1DR cells from the Zeisel dataset

We will get the cell names from the clustered Zeisel object, but take the count data from the loom file and use the same genes from the inDrop dataset.

Get the cell names:

zeisel.R1DR.cellnames <- WhichCells(object = zeisel.ob.SER,
                                    ident = c("R1 (medial DRN)",
                                              "R1 (lateral DRN)"))

Get the R1DR cells from the count matrix:

library(loomR)
Loading required package: R6
Loading required package: hdf5r

Attaching package: ‘loomR’

The following object is masked from ‘package:dplyr’:

    combine

The following object is masked from ‘package:devtools’:

    create
lfile <- connect(filename = "/Volumes/LaCie/Dropbox/Sabatini Lab/DRN Cell Types Project/DRN Cell Types Manuscript/Revisions (1)/RNA-seq/l6_r3_cholinergic_monoaminergic_and_peptidergic_neurons.loom")
zeisel.expr <- lfile$matrix[,]
zeisel.expr <- t(zeisel.expr)
zeisel.cellIDs <- lfile$col.attrs$CellID[]
zeisel.genes <- lfile$row.attrs$Gene[]
zeisel.genes.fix <- str_replace(string = zeisel.genes,
                                pattern = "-",
                                replacement = ".")
zeisel.expr <- as.data.frame(x = zeisel.expr,
                             row.names = zeisel.genes.fix)
colnames(zeisel.expr) <- zeisel.cellIDs
lfile$close_all()
zeisel.expr.R1DR <- zeisel.expr[,zeisel.R1DR.cellnames]
dim(zeisel.expr.R1DR)
[1] 27998   118

2. CCA-based merging of scRNA-seq data

We will only use cells in each dataset that are from the DRN and exclude 5-HT neurons from other structures/rhombomeres for this analysis. This should allow us to see if subtypes that we have identified in our dataset are also present in these other datasets.

2.1 Use the same genes in both datasets

huang.genes <- rownames(dRaphe.neurons.5HT@data)
sum(is.element(huang.genes, zeisel.genes.fix))
[1] 13931
head(huang.genes[!is.element(huang.genes, zeisel.genes.fix)],100)
  [1] "X.343C11.2"      "X0610007P14Rik"  "X0610009B22Rik"  "X0610009E02Rik"  "X0610009L18Rik" 
  [6] "X0610009O20Rik"  "X0610010F05Rik"  "X0610012G03Rik"  "X0610030E20Rik"  "X0610031O16Rik" 
 [11] "X0610037L13Rik"  "X0610040B10Rik"  "X0610040F04Rik"  "X0610040J01Rik"  "X0610043K17Rik" 
 [16] "X1110002E22Rik"  "X1110002L01Rik"  "X1110003F10Rik"  "X1110004E09Rik"  "X1110004F10Rik" 
 [21] "X1110008F13Rik"  "X1110008L16Rik"  "X1110008P14Rik"  "X1110015O18Rik"  "X1110017D15Rik" 
 [26] "X1110019D14Rik"  "X1110025M09Rik"  "X1110032A03Rik"  "X1110037F02Rik"  "X1110038B12Rik" 
 [31] "X1110038F14Rik"  "X1110051M20Rik"  "X1110059E24Rik"  "X1110059G10Rik"  "X1110065P20Rik" 
 [36] "X1190002N15Rik"  "X1190007I07Rik"  "X1300002E11Rik"  "X1300017J02Rik"  "X1500002C15Rik" 
 [41] "X1500004A13Rik"  "X1500005C15Rik"  "X1500009C09Rik"  "X1500009L16Rik"  "X1500011B03Rik" 
 [46] "X1500011K16Rik"  "X1500015A07Rik"  "X1500015O10Rik"  "X1500017E21Rik"  "X1500026H17Rik" 
 [51] "X1600002H07Rik"  "X1600002K03Rik"  "X1600012H06Rik"  "X1600014C10Rik"  "X1600020E01Rik" 
 [56] "X1600023N17Rik"  "X1600029O15Rik"  "X1700001L19Rik"  "X1700001O22Rik"  "X1700003E16Rik" 
 [61] "X1700003M07Rik"  "X1700006J14Rik"  "X1700007K13Rik"  "X1700008J07Rik"  "X1700010I14Rik" 
 [66] "X1700012B09Rik"  "X1700012D14Rik"  "X1700016K19Rik"  "X1700017B05Rik"  "X1700017D01Rik" 
 [71] "X1700019B21Rik"  "X1700019D03Rik"  "X1700020I14Rik"  "X1700021A07Rik"  "X1700021F05Rik" 
 [76] "X1700025G04Rik"  "X1700028E10Rik"  "X1700028J19Rik"  "X1700028K03Rik"  "X1700029J07Rik" 
 [81] "X1700030C10Rik"  "X1700030K09Rik"  "X1700031P21Rik"  "X1700037C18Rik"  "X1700037H04Rik" 
 [86] "X1700040L02Rik"  "X1700041G16Rik"  "X1700042O10Rik"  "X1700047I17Rik2" "X1700047M11Rik" 
 [91] "X1700052K11Rik"  "X1700055D18Rik"  "X1700056N10Rik"  "X1700063D05Rik"  "X1700066B19Rik" 
 [96] "X1700066M21Rik"  "X1700073E17Rik"  "X1700086L19Rik"  "X1700086O06Rik"  "X1700086P04Rik" 
tail(huang.genes[!is.element(huang.genes, zeisel.genes.fix)],100)
  [1] "Gm42568"       "Gm42633"       "Gm42648"       "Gm42668"       "Gm42689"       "Gm42718"      
  [7] "Gm42770"       "Gm42819"       "Gm42853"       "Gm42860"       "Gm42969"       "Gm43103"      
 [13] "Gm43149"       "Gm43180"       "Gm43205"       "Gm43210"       "Gm43237"       "Gm43268"      
 [19] "Gm43279"       "Gm43336"       "Gm43457"       "Gm43490"       "Gm43547"       "Gm43677"      
 [25] "Gm43776"       "Gm43795"       "Gm43879"       "Gm43884"       "Gm43920"       "Gm44033"      
 [31] "Gm44041"       "Gm44044"       "Gm44229"       "Gm44438"       "Gm44439"       "Gm44440"      
 [37] "Gm44542"       "Gm44630"       "Gm44682"       "Gm44797"       "Gm44799"       "Gm44834"      
 [43] "Gm44848"       "Gm44860"       "Gm44878"       "Gm44913"       "Gm45019"       "Gm45069"      
 [49] "Gm45211"       "Gm45251"       "Gm45287"       "Gm45319"       "Gm45407"       "Gm45416"      
 [55] "Gm45605"       "Gm45652"       "Gm4604"        "Gm4651"        "Gm5297"        "Gm5586"       
 [61] "Gm5678"        "Gm5776"        "Gm6028"        "Gm6394"        "Gm6395"        "Gm6498"       
 [67] "Gm6905"        "Gm7123"        "Gm7571"        "Gm8463"        "Ifi213"        "Lipo3"        
 [73] "Mfsd13a"       "Mrln"          "N4bp2os"       "Nectin2"       "Platr9"        "Prmt9"        
 [79] "RP23.114G13.1" "RP23.137G18.2" "RP23.177D16.3" "RP23.177D16.5" "RP23.181A8.2"  "RP23.240G3.2" 
 [85] "RP23.257B9.1"  "RP23.269H21.1" "RP23.314A15.5" "RP23.451B4.1"  "RP23.451H9.2"  "RP23.69L13.2" 
 [91] "RP24.189I2.4"  "RP24.362N13.1" "RP24.369J8.1"  "RP24.418N13.2" "RP24.571A14.6" "Rpap3.ps2"    
 [97] "Rpl10.ps2"     "Rpl30.ps11"    "Txn.ps1"       "Zfp975"       

Use the intersection:

genes.use <- huang.genes[is.element(huang.genes, zeisel.genes.fix)]
length(genes.use)
[1] 13931

Subset genes for both datasets:

zeisel.expr.R1DR <- zeisel.expr.R1DR[genes.use,]
dim(zeisel.expr.R1DR)
[1] 13931   118
huang.expr <- as.matrix(dRaphe.neurons.5HT@raw.data)
huang.expr <- huang.expr[genes.use,]
dim(huang.expr)
[1] 13931   704

2.2 Generate Seurat objects for each dataset

2.2.1 Initialize objects

huang.metadata <- dRaphe.neurons.5HT@meta.data[,c("BatchID", "Sex", "subtypeIDs")]
colnames(huang.metadata) <- c("BatchID", "Sex", "orig.clusterNames")
huang.metadata$BatchID <- as.factor(huang.metadata$BatchID)
zeisel.metadata <- as.data.frame(matrix(data = NA, 
                                        nrow = length(zeisel.R1DR.cellnames),
                                        ncol = 2),
                                 row.names = zeisel.R1DR.cellnames)
zeisel.metadata <- cbind(zeisel.metadata,
                         zeisel.ob.SER@meta.data[zeisel.R1DR.cellnames, c("zeisel.clusterNames")])
colnames(zeisel.metadata) <- c("BatchID", "Sex", "orig.clusterNames")
zeisel.metadata$BatchID <- as.factor(zeisel.metadata$BatchID)
zeisel.metadata$Sex <- as.factor(zeisel.metadata$Sex)
huang.ob <- CreateSeuratObject(raw.data = huang.expr,
                               meta.data = huang.metadata,
                               project = "Huang_inDrops",
                               min.cells = 0,
                               min.genes = 0,
                               scale.factor = 10000)
zeisel.ob <- CreateSeuratObject(raw.data = zeisel.expr.R1DR,
                                meta.data = zeisel.metadata,
                                project = "Zeisel_10X",
                                min.cells = 0,
                                min.genes = 0,
                                scale.factor = 10000)
mito.genes <- grep("^mt.", rownames(huang.ob@data), value = TRUE)
percent.mito <- Matrix::colSums(huang.ob@data[mito.genes, ])/Matrix::colSums(huang.ob@data)
huang.ob <- AddMetaData(huang.ob, percent.mito, "percent.mito")
percent.mito <- Matrix::colSums(zeisel.ob@data[mito.genes, ])/Matrix::colSums(zeisel.ob@data)
zeisel.ob <- AddMetaData(zeisel.ob, percent.mito, "percent.mito")
rm(percent.mito)

2.2.2 Normalize and scale data

huang.ob <- NormalizeData(object = huang.ob,
                          normalization.method = "LogNormalize",
                          scale.factor = 10000,
                          display.progress = FALSE)
zeisel.ob <- NormalizeData(object = zeisel.ob,
                           normalization.method = "LogNormalize",
                           scale.factor = 10000,
                           display.progress = FALSE)
huang.ob <- ScaleData(object = huang.ob,
                      vars.to.regress = c("nUMI", "percent.mito", "BatchID"),
                      model.use = "linear",
                      do.scale = TRUE,
                      scale.max = 10,
                      do.center = TRUE,
                      do.par = TRUE,
                      num.cores = 4,
                      display.progress = FALSE)
zeisel.ob <- ScaleData(object = zeisel.ob,
                       vars.to.regress = c("nUMI", "percent.mito"),
                       model.use = "linear",
                       do.scale = TRUE,
                       scale.max = 10,
                       do.center = TRUE,
                       do.par = TRUE,
                       num.cores = 4,
                       display.progress = FALSE)

2.3 Run multi-dataset CCA and align datasets into a merged object

2.3.1 Find highly variable genes (HVGs) in each dataset

huang.ob <- FindVariableGenes(object = huang.ob,
                              mean.function = ExpMean,
                              dispersion.function = LogVMR,
                              x.low.cutoff = 0.075,
                              x.high.cutoff = 4,
                              y.cutoff = 0.5,
                              num.bin = 100)
Calculating gene means
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variance to mean ratios
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|

length(huang.ob@var.genes)
[1] 2366
zeisel.ob <- FindVariableGenes(object = zeisel.ob,
                              mean.function = ExpMean,
                              dispersion.function = LogVMR,
                              x.low.cutoff = 0.075,
                              x.high.cutoff = 4,
                              y.cutoff = 0.5,
                              num.bin = 100)
Calculating gene means
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variance to mean ratios
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|

length(zeisel.ob@var.genes)
[1] 1990

2.3.2 Find HVGs

hvgs <- union(huang.ob@var.genes, zeisel.ob@var.genes)
length(hvgs)
[1] 3920

2.3.2 Run CCA

huang.ob@meta.data$dataset <- "Huang"
zeisel.ob@meta.data$dataset <- "Zeisel"
combined.ob <- RunCCA(object = huang.ob,
                      object2 = zeisel.ob,
                      genes.use = hvgs,
                      num.cc = 30,
                      scale.data = TRUE)
Running CCA
Merging objects
Performing log-normalization
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Scaling data matrix

  |                                                                                                    
  |                                                                                              |   0%
  |                                                                                                    
  |==============================================================================================| 100%

2.3.3 Visualize and select CVs

for (i in 1:10) {
  DimHeatmap(object = combined.ob,
             reduction.type = "cca",
             cells.use = 200,
             dim.use = (3*(i-1)+1):(3*i),
             do.balanced = TRUE)
}

MetageneBicorPlot(object = combined.ob, 
                  grouping.var = "dataset", 
                  dims.eval = 1:30,
                  display.progress = FALSE)
Rescaling group 1
Rescaling group 2

2.3.4 Subspace alignment

combined.ob <- AlignSubspace(object = combined.ob,
                             reduction.type = "cca",
                             grouping.var = "dataset",
                             dims.align = 1:7,
                             verbose = FALSE)
VlnPlot(object = combined.ob,
        features.plot = c("CC1", "ACC1", "CC2", "ACC2",
                          "CC3", "ACC3", "CC4", "ACC4"),
        nCol = 4,
        group.by = "dataset",
        point.size.use = 0)

3. Clustering & DR

3.1 UMAP on aligned CVs

combined.ob <- RunUMAP(object = combined.ob,
                       reduction.use = "cca.aligned",
                       dims.use = 1:7,
                       n_neighbors = 30L,
                       min_dist = 0.1,
                       metric = "correlation")

Re-label cells by dataset + cluster names:

huang.cellnames.I <- WhichCells(object = dRaphe.neurons.5HT,
                                ident = "5-HT-I")
huang.cellnames.II <- WhichCells(object = dRaphe.neurons.5HT,
                                ident = "5-HT-II")
huang.cellnames.III <- WhichCells(object = dRaphe.neurons.5HT,
                                ident = "5-HT-III")
huang.cellnames.IV <- WhichCells(object = dRaphe.neurons.5HT,
                                ident = "5-HT-IV")
huang.cellnames.V <- WhichCells(object = dRaphe.neurons.5HT,
                                ident = "5-HT-V")
zeisel.cellnames.l <- WhichCells(object = zeisel.ob.SER,
                                ident = "R1 (lateral DRN)")
zeisel.cellnames.m <- WhichCells(object = zeisel.ob.SER,
                                ident = "R1 (medial DRN)")
combined.ob <- SetIdent(object = combined.ob,
                        cells.use = huang.cellnames.I,
                        ident.use = "Huang_5-HT-I")
combined.ob <- SetIdent(object = combined.ob,
                        cells.use = huang.cellnames.II,
                        ident.use = "Huang_5-HT-II")
combined.ob <- SetIdent(object = combined.ob,
                        cells.use = huang.cellnames.III,
                        ident.use = "Huang_5-HT-III")
combined.ob <- SetIdent(object = combined.ob,
                        cells.use = huang.cellnames.IV,
                        ident.use = "Huang_5-HT-IV")
combined.ob <- SetIdent(object = combined.ob,
                        cells.use = huang.cellnames.V,
                        ident.use = "Huang_5-HT-V")
combined.ob <- SetIdent(object = combined.ob,
                        cells.use = zeisel.cellnames.l,
                        ident.use = "Zeisel_R1DR-lateral")
combined.ob <- SetIdent(object = combined.ob,
                        cells.use = zeisel.cellnames.m,
                        ident.use = "Zeisel_R1DR-medial")
p1 <- DimPlot(object = combined.ob,
             reduction.use = "umap",
             do.label = FALSE,
             no.legend = FALSE,
             pt.size = 3,
             group.by = "dataset",
             do.return = TRUE)
p2 <- DimPlot(object = combined.ob,
             reduction.use = "umap",
             do.label = FALSE,
             no.legend = FALSE,
             pt.size = 3,
             do.return = TRUE)
plot_grid(p1, p2)

FeaturePlot(object = combined.ob,
            nCol = 4,
            reduction.use = "umap",
            features.plot = c("Slc6a4", "Tph2", "Fev", "En1",
                              "Prkcq", "Asb4", "Hcrtr1", "Trh",
                              "Pdyn", "Slc17a8", "Cbln2", "Met"),
            cols.use = c("gray", "red"),
            no.legend = FALSE,
            pt.size = 1)

3.2 Try PCA

combined.ob <- RunPCA(object = combined.ob,
                      pc.genes = hvgs,
                      pcs.compute = 30,
                      weight.by.var = FALSE,
                      pcs.print = NA)
for (i in 1:10) {
  DimHeatmap(object = combined.ob,
             reduction.type = "pca",
             cells.use = 200,
             dim.use = (3*(i-1)+1):(3*i),
             do.balanced = TRUE)
}

PCElbowPlot(object = combined.ob,
            num.pc = 30)

combined.ob <- RunUMAP(object = combined.ob,
                       reduction.use = "pca",
                       dims.use = 1:11,
                       n_neighbors = 30L,
                       min_dist = 0.1,
                       metric = "correlation")
p3 <- DimPlot(object = combined.ob,
             reduction.use = "umap",
             do.label = FALSE,
             no.legend = FALSE,
             pt.size = 3,
             group.by = "dataset",
             do.return = TRUE)
p4 <- DimPlot(object = combined.ob,
             reduction.use = "umap",
             do.label = FALSE,
             no.legend = FALSE,
             pt.size = 3,
             do.return = TRUE)
plot_grid(p3, p4)

p3b <- DimPlot(object = combined.ob,
             reduction.use = "umap",
             do.label = FALSE,
             no.legend = FALSE,
             pt.size = 3,
             group.by = "BatchID",
             do.return = TRUE)
plot_grid(p3, p3b)

FeaturePlot(object = combined.ob,
            nCol = 4,
            reduction.use = "umap",
            features.plot = c("Slc6a4", "Tph2", "Fev", "En1",
                              "Prkcq", "Asb4", "Hcrtr1", "Trh",
                              "Pdyn", "Slc17a8", "Cbln2", "Met"),
            cols.use = c("gray", "red"),
            no.legend = FALSE,
            pt.size = 1)

FeaturePlot(object = combined.ob,
            nCol = 4,
            reduction.use = "umap",
            features.plot = c("nUMI", "nGene", "percent.mito", "Snap25",
                              "PC1", "PC2", "PC3", "PC4",
                              "PC5", "PC6", "PC7", "PC8"),
            cols.use = c("gray", "red"),
            no.legend = FALSE,
            pt.size = 1)

Try running UMAP but now excluding PCs 1:3 – likely related to batch effects separating datasets and batches.

combined.ob <- RunUMAP(object = combined.ob,
                       reduction.use = "pca",
                       dims.use = 4:10,
                       n_neighbors = 30L,
                       min_dist = 0.1,
                       metric = "correlation")
p5 <- DimPlot(object = combined.ob,
             reduction.use = "umap",
             do.label = FALSE,
             no.legend = FALSE,
             pt.size = 3,
             group.by = "dataset",
             do.return = TRUE)
p6 <- DimPlot(object = combined.ob,
             reduction.use = "umap",
             do.label = FALSE,
             no.legend = FALSE,
             pt.size = 3,
             do.return = TRUE)
plot_grid(p5, p6)

plot_grid(p4, p6)

plot_grid(p2, p6)

FeaturePlot(object = combined.ob,
            nCol = 4,
            reduction.use = "umap",
            features.plot = c("Slc6a4", "Tph2", "Fev", "En1",
                              "Prkcq", "Asb4", "Hcrtr1", "Trh",
                              "Pdyn", "Slc17a8", "Cbln2", "Met"),
            cols.use = c("gray", "red"),
            no.legend = FALSE,
            pt.size = 1)

Zeisel R1DR neurons are appearing in the different subtype clusters from our dataset, rather than separating out.

saveRDS(object = combined.ob,
        file = file.path(wd, "Combined_Huang_5-HT_Zeisel_R1DR.rds"))

Session Information

Machine specifications:

  • Mac Pro (Late 2013)
  • macOS High Sierra 10.13.4
  • 3.7 GHz Quad-Core Intel Xeon E5
  • 64 GB 1866 MHz DDR3
  • Java version “1.8.0_172”
devtools::session_info()
Session info ------------------------------------------------------------------------------------------
 setting  value                       
 version  R version 3.4.4 (2018-03-15)
 system   x86_64, darwin15.6.0        
 ui       RStudio (1.1.447)           
 language (EN)                        
 collate  en_US.UTF-8                 
 tz       America/New_York            
 date     2019-05-23                  
Packages ----------------------------------------------------------------------------------------------
 package       * version    date       source                            
 abind           1.4-5      2016-07-21 CRAN (R 3.4.0)                    
 acepack         1.4.1      2016-10-29 cran (@1.4.1)                     
 ape           * 5.1        2018-04-04 CRAN (R 3.4.4)                    
 assertthat      0.2.0      2017-04-11 CRAN (R 3.4.0)                    
 backports       1.1.2      2017-12-13 CRAN (R 3.4.3)                    
 base          * 3.4.4      2018-03-15 local                             
 base64enc       0.1-3      2015-07-28 cran (@0.1-3)                     
 bindr           0.1.1      2018-03-13 CRAN (R 3.4.4)                    
 bindrcpp      * 0.2.2      2018-03-29 CRAN (R 3.4.4)                    
 bit             1.1-14     2018-05-29 CRAN (R 3.4.4)                    
 bit64           0.9-7      2017-05-08 CRAN (R 3.4.0)                    
 bitops          1.0-6      2013-08-17 cran (@1.0-6)                     
 broom           0.4.4      2018-03-29 CRAN (R 3.4.4)                    
 caret           6.0-80     2018-05-26 CRAN (R 3.4.4)                    
 caTools         1.17.1     2014-09-10 cran (@1.17.1)                    
 checkmate       1.8.5      2017-10-24 CRAN (R 3.4.2)                    
 class           7.3-14     2015-08-30 CRAN (R 3.4.4)                    
 cluster         2.0.7-1    2018-04-09 CRAN (R 3.4.4)                    
 codetools       0.2-15     2016-10-05 CRAN (R 3.4.4)                    
 colorspace      1.3-2      2016-12-14 CRAN (R 3.4.0)                    
 compiler        3.4.4      2018-03-15 local                             
 cowplot       * 0.9.2      2017-12-17 CRAN (R 3.4.3)                    
 CVST            0.2-2      2018-05-26 CRAN (R 3.4.4)                    
 data.table      1.10.4-3   2017-10-27 CRAN (R 3.4.2)                    
 datasets      * 3.4.4      2018-03-15 local                             
 ddalpha         1.3.2      2018-04-08 CRAN (R 3.4.4)                    
 DEoptimR        1.0-8      2016-11-19 cran (@1.0-8)                     
 devtools      * 1.13.5     2018-02-18 CRAN (R 3.4.3)                    
 diffusionMap    1.1-0      2014-02-20 cran (@1.1-0)                     
 digest          0.6.15     2018-01-28 CRAN (R 3.4.3)                    
 dimRed          0.1.0      2017-05-04 CRAN (R 3.4.0)                    
 diptest         0.75-7     2016-12-05 cran (@0.75-7)                    
 doSNOW          1.0.16     2017-12-13 CRAN (R 3.4.3)                    
 dplyr         * 0.7.5      2018-05-19 CRAN (R 3.4.4)                    
 DRR             0.0.3      2018-01-06 CRAN (R 3.4.3)                    
 dtw             1.20-1     2018-05-18 CRAN (R 3.4.4)                    
 evaluate        0.10.1     2017-06-24 cran (@0.10.1)                    
 fitdistrplus    1.0-9      2017-03-24 CRAN (R 3.4.0)                    
 flexmix         2.3-14     2017-04-28 cran (@2.3-14)                    
 FNN             1.1        2013-07-31 cran (@1.1)                       
 foreach         1.4.4      2017-12-12 CRAN (R 3.4.3)                    
 foreign         0.8-70     2018-04-23 CRAN (R 3.4.4)                    
 Formula         1.2-3      2018-05-03 CRAN (R 3.4.4)                    
 fpc             2.1-11     2018-01-13 CRAN (R 3.4.3)                    
 gdata           2.18.0     2017-06-06 cran (@2.18.0)                    
 geometry        0.3-6      2015-09-09 CRAN (R 3.4.0)                    
 ggplot2       * 2.2.1      2016-12-30 CRAN (R 3.4.0)                    
 ggridges        0.5.0      2018-04-05 CRAN (R 3.4.4)                    
 glue            1.3.1      2019-03-12 cran (@1.3.1)                     
 gower           0.1.2      2017-02-23 CRAN (R 3.4.0)                    
 gplots          3.0.1      2016-03-30 cran (@3.0.1)                     
 graphics      * 3.4.4      2018-03-15 local                             
 grDevices     * 3.4.4      2018-03-15 local                             
 grid            3.4.4      2018-03-15 local                             
 gridExtra       2.3        2017-09-09 CRAN (R 3.4.1)                    
 gtable          0.2.0      2016-02-26 CRAN (R 3.4.0)                    
 gtools          3.5.0      2015-05-29 cran (@3.5.0)                     
 hdf5r         * 1.2.0      2019-04-20 CRAN (R 3.4.4)                    
 Hmisc           4.1-1      2018-01-03 CRAN (R 3.4.3)                    
 htmlTable       1.12       2018-05-26 CRAN (R 3.4.4)                    
 htmltools       0.3.6      2017-04-28 cran (@0.3.6)                     
 htmlwidgets     1.2        2018-04-19 CRAN (R 3.4.4)                    
 ica             1.0-2      2018-05-24 CRAN (R 3.4.4)                    
 igraph          1.2.1      2018-03-10 CRAN (R 3.4.4)                    
 ipred           0.9-6      2017-03-01 CRAN (R 3.4.0)                    
 irlba           2.3.2      2018-01-11 CRAN (R 3.4.3)                    
 iterators       1.0.9      2017-12-12 CRAN (R 3.4.3)                    
 jsonlite        1.5        2017-06-01 CRAN (R 3.4.0)                    
 kernlab         0.9-26     2018-04-30 CRAN (R 3.4.4)                    
 KernSmooth      2.23-15    2015-06-29 CRAN (R 3.4.4)                    
 knitr           1.20       2018-02-20 CRAN (R 3.4.3)                    
 labeling        0.3        2014-08-23 CRAN (R 3.4.0)                    
 lars            1.2        2013-04-24 cran (@1.2)                       
 lattice         0.20-35    2017-03-25 CRAN (R 3.4.4)                    
 latticeExtra    0.6-28     2016-02-09 cran (@0.6-28)                    
 lava            1.6.1      2018-03-28 CRAN (R 3.4.4)                    
 lazyeval        0.2.1      2017-10-29 CRAN (R 3.4.2)                    
 lmtest          0.9-36     2018-04-04 CRAN (R 3.4.4)                    
 loomR         * 0.2.1.9000 2019-04-26 Github (mojaveazure/loomR@87a6866)
 lubridate       1.7.4      2018-04-11 CRAN (R 3.4.4)                    
 magic           1.5-8      2018-01-26 CRAN (R 3.4.3)                    
 magrittr        1.5        2014-11-22 CRAN (R 3.4.0)                    
 MASS            7.3-50     2018-04-30 CRAN (R 3.4.4)                    
 Matrix        * 1.2-14     2018-04-09 CRAN (R 3.4.4)                    
 mclust          5.4        2017-11-22 CRAN (R 3.4.3)                    
 memoise         1.1.0      2017-04-21 CRAN (R 3.4.0)                    
 metap           0.9        2018-04-25 CRAN (R 3.4.4)                    
 methods       * 3.4.4      2018-03-15 local                             
 mixtools        1.1.0      2017-03-10 cran (@1.1.0)                     
 mnormt          1.5-5      2016-10-15 cran (@1.5-5)                     
 ModelMetrics    1.1.0      2016-08-26 cran (@1.1.0)                     
 modeltools      0.2-21     2013-09-02 cran (@0.2-21)                    
 munsell         0.4.3      2016-02-13 CRAN (R 3.4.0)                    
 mvtnorm         1.0-7      2018-01-25 CRAN (R 3.4.3)                    
 nlme            3.1-137    2018-04-07 CRAN (R 3.4.4)                    
 nnet            7.3-12     2016-02-02 CRAN (R 3.4.4)                    
 parallel      * 3.4.4      2018-03-15 local                             
 pbapply         1.4-0      2019-02-05 cran (@1.4-0)                     
 pillar          1.2.3      2018-05-25 CRAN (R 3.4.4)                    
 pkgconfig       2.0.1      2017-03-21 CRAN (R 3.4.0)                    
 plyr            1.8.4      2016-06-08 CRAN (R 3.4.0)                    
 png             0.1-7      2013-12-03 CRAN (R 3.4.0)                    
 prabclus        2.2-6      2015-01-14 cran (@2.2-6)                     
 prodlim         2018.04.18 2018-04-18 CRAN (R 3.4.4)                    
 proxy           0.4-22     2018-04-08 CRAN (R 3.4.4)                    
 psych           1.8.4      2018-05-06 CRAN (R 3.4.4)                    
 purrr           0.2.5      2018-05-29 CRAN (R 3.4.4)                    
 R.methodsS3     1.7.1      2016-02-16 cran (@1.7.1)                     
 R.oo            1.22.0     2018-04-22 CRAN (R 3.4.4)                    
 R.utils         2.6.0      2017-11-05 CRAN (R 3.4.2)                    
 R6            * 2.4.0      2019-02-14 CRAN (R 3.4.4)                    
 ranger          0.10.0     2018-05-29 CRAN (R 3.4.4)                    
 RANN            2.5.1      2017-05-21 CRAN (R 3.4.0)                    
 RColorBrewer    1.1-2      2014-12-07 CRAN (R 3.4.0)                    
 Rcpp            0.12.17    2018-05-18 CRAN (R 3.4.4)                    
 RcppRoll        0.2.2      2015-04-05 CRAN (R 3.4.0)                    
 recipes         0.1.2      2018-01-11 CRAN (R 3.4.3)                    
 reshape2        1.4.3      2017-12-11 CRAN (R 3.4.3)                    
 reticulate    * 1.11.1     2019-03-06 CRAN (R 3.4.4)                    
 rlang           0.2.0      2018-02-20 CRAN (R 3.4.3)                    
 rmarkdown       1.9        2018-03-01 CRAN (R 3.4.3)                    
 robustbase      0.92-8     2017-11-01 CRAN (R 3.4.2)                    
 ROCR            1.0-7      2015-03-26 cran (@1.0-7)                     
 rpart           4.1-13     2018-02-23 CRAN (R 3.4.4)                    
 rprojroot       1.3-2      2018-01-03 CRAN (R 3.4.3)                    
 rstudioapi      0.7        2017-09-07 CRAN (R 3.4.1)                    
 Rtsne           0.13       2017-04-14 cran (@0.13)                      
 scales          0.5.0      2017-08-24 CRAN (R 3.4.1)                    
 scatterplot3d   0.3-41     2018-03-14 CRAN (R 3.4.4)                    
 SDMTools        1.1-221    2014-08-05 cran (@1.1-221)                   
 segmented       0.5-3.0    2017-11-30 CRAN (R 3.4.3)                    
 Seurat        * 2.3.1      2018-05-05 CRAN (R 3.4.4)                    
 sfsmisc         1.1-2      2018-03-05 CRAN (R 3.4.4)                    
 snow            0.4-2      2016-10-14 CRAN (R 3.4.0)                    
 splines         3.4.4      2018-03-15 local                             
 stats         * 3.4.4      2018-03-15 local                             
 stats4          3.4.4      2018-03-15 local                             
 stringi         1.4.3      2019-03-12 cran (@1.4.3)                     
 stringr       * 1.4.0      2019-02-10 cran (@1.4.0)                     
 survival        2.42-3     2018-04-16 CRAN (R 3.4.4)                    
 tclust          1.4-1      2018-05-24 CRAN (R 3.4.4)                    
 tibble          1.4.2      2018-01-22 CRAN (R 3.4.3)                    
 tidyr           0.8.1      2018-05-18 CRAN (R 3.4.4)                    
 tidyselect      0.2.4      2018-02-26 CRAN (R 3.4.3)                    
 timeDate        3043.102   2018-02-21 CRAN (R 3.4.3)                    
 tools           3.4.4      2018-03-15 local                             
 trimcluster     0.1-2      2012-10-29 cran (@0.1-2)                     
 tsne            0.1-3      2016-07-15 cran (@0.1-3)                     
 useful        * 1.2.4      2018-05-21 CRAN (R 3.4.4)                    
 utils         * 3.4.4      2018-03-15 local                             
 VGAM            1.0-5      2018-02-07 CRAN (R 3.4.3)                    
 withr           2.1.2      2018-03-15 CRAN (R 3.4.4)                    
 yaml            2.1.19     2018-05-01 CRAN (R 3.4.4)                    
 zoo             1.8-1      2018-01-08 CRAN (R 3.4.3)                    
---
title: "DRN Cell Types Project: Co-Clustering"
output: 
  html_notebook:
    toc: yes
    toc_depth: 3
    toc_float: yes
author: "Kee Wui Huang, Sabatini Lab (Harvard Medical School)"
date: "`r format(Sys.time(), '%d %B, %Y')`"
---

# Notebook Overiew
This notebook is a part of a series that documents the review analysis for the DRN Cell Types project.

This notebook contains the code used to co-cluster the DRN 5-HT neurons in the Zeisel et al. (2018) dataset with our DRN 5-HT neuronal subset. We will exclude the Okaty et al. (2015) cells since there are only 8 cells in that dataset.


# Analysis Workflow

## 1. Initialize

### 1.1 Load libraries
```{r}
library(devtools)
library(useful)
library(dplyr)
library(ggplot2)
library(reticulate)
library(Seurat)
library(stringr)
library(Matrix)
library(parallel)
library(ape)
```

### 1.2 Load datasets
```{r}
wd <- "/Volumes/LaCie/Dropbox/Sabatini Lab/DRN Cell Types Project/DRN Cell Types Manuscript/Revisions (1)/RNA-seq/"
dRaphe.neurons <- readRDS(file.path(wd, "DRN_inDrop_neurons.rds"))
dRaphe.neurons.5HT <- SubsetData(object = dRaphe.neurons,
                            ident.use = c("5-HT-I",
                                          "5-HT-II",
                                          "5-HT-III",
                                          "5-HT-IV",
                                          "5-HT-V"),
                            subset.raw = TRUE)
dim(dRaphe.neurons.5HT@data)

zeisel.ob.SER <- readRDS(file.path(wd, "zeisel_Seurat_HBSER_clustered_filtered.rds"))
dim(zeisel.ob.SER@data)
```

### 1.3 Subset the R1DR cells from the Zeisel dataset

We will get the cell names from the clustered Zeisel object, but take the count data from the loom file and use the same genes from the inDrop dataset.

Get the cell names:
```{r}
zeisel.R1DR.cellnames <- WhichCells(object = zeisel.ob.SER,
                                    ident = c("R1 (medial DRN)",
                                              "R1 (lateral DRN)"))
```

Get the R1DR cells from the count matrix:
```{r}
library(loomR)
lfile <- connect(filename = "/Volumes/LaCie/Dropbox/Sabatini Lab/DRN Cell Types Project/DRN Cell Types Manuscript/Revisions (1)/RNA-seq/l6_r3_cholinergic_monoaminergic_and_peptidergic_neurons.loom")
zeisel.expr <- lfile$matrix[,]
zeisel.expr <- t(zeisel.expr)
zeisel.cellIDs <- lfile$col.attrs$CellID[]
zeisel.genes <- lfile$row.attrs$Gene[]
zeisel.genes.fix <- str_replace(string = zeisel.genes,
                                pattern = "-",
                                replacement = ".")
zeisel.expr <- as.data.frame(x = zeisel.expr,
                             row.names = zeisel.genes.fix)
colnames(zeisel.expr) <- zeisel.cellIDs
lfile$close_all()
```

```{r}
zeisel.expr.R1DR <- zeisel.expr[,zeisel.R1DR.cellnames]
dim(zeisel.expr.R1DR)
```


## 2. CCA-based merging of scRNA-seq data

We will only use cells in each dataset that are from the DRN and exclude 5-HT neurons from other structures/rhombomeres for this analysis. This should allow us to see if subtypes that we have identified in our dataset are also present in these other datasets.


### 2.1 Use the same genes in both datasets
```{r}
huang.genes <- rownames(dRaphe.neurons.5HT@data)
sum(is.element(huang.genes, zeisel.genes.fix))
```

```{r}
head(huang.genes[!is.element(huang.genes, zeisel.genes.fix)],100)
tail(huang.genes[!is.element(huang.genes, zeisel.genes.fix)],100)
```

Use the intersection:
```{r}
genes.use <- huang.genes[is.element(huang.genes, zeisel.genes.fix)]
length(genes.use)
```

Subset genes for both datasets:
```{r}
zeisel.expr.R1DR <- zeisel.expr.R1DR[genes.use,]
dim(zeisel.expr.R1DR)
```

```{r}
huang.expr <- as.matrix(dRaphe.neurons.5HT@raw.data)
huang.expr <- huang.expr[genes.use,]
dim(huang.expr)
```


### 2.2 Generate Seurat objects for each dataset

#### 2.2.1 Initialize objects
```{r}
huang.metadata <- dRaphe.neurons.5HT@meta.data[,c("BatchID", "Sex", "subtypeIDs")]
colnames(huang.metadata) <- c("BatchID", "Sex", "orig.clusterNames")
huang.metadata$BatchID <- as.factor(huang.metadata$BatchID)

zeisel.metadata <- as.data.frame(matrix(data = NA, 
                                        nrow = length(zeisel.R1DR.cellnames),
                                        ncol = 2),
                                 row.names = zeisel.R1DR.cellnames)
zeisel.metadata <- cbind(zeisel.metadata,
                         zeisel.ob.SER@meta.data[zeisel.R1DR.cellnames, c("zeisel.clusterNames")])
colnames(zeisel.metadata) <- c("BatchID", "Sex", "orig.clusterNames")
zeisel.metadata$BatchID <- as.factor(zeisel.metadata$BatchID)
zeisel.metadata$Sex <- as.factor(zeisel.metadata$Sex)
```

```{r}
huang.ob <- CreateSeuratObject(raw.data = huang.expr,
                               meta.data = huang.metadata,
                               project = "Huang_inDrops",
                               min.cells = 0,
                               min.genes = 0,
                               scale.factor = 10000)
zeisel.ob <- CreateSeuratObject(raw.data = zeisel.expr.R1DR,
                                meta.data = zeisel.metadata,
                                project = "Zeisel_10X",
                                min.cells = 0,
                                min.genes = 0,
                                scale.factor = 10000)
```

```{r}
mito.genes <- grep("^mt.", rownames(huang.ob@data), value = TRUE)

percent.mito <- Matrix::colSums(huang.ob@data[mito.genes, ])/Matrix::colSums(huang.ob@data)
huang.ob <- AddMetaData(huang.ob, percent.mito, "percent.mito")

percent.mito <- Matrix::colSums(zeisel.ob@data[mito.genes, ])/Matrix::colSums(zeisel.ob@data)
zeisel.ob <- AddMetaData(zeisel.ob, percent.mito, "percent.mito")

rm(percent.mito)
```

#### 2.2.2 Normalize and scale data
```{r}
huang.ob <- NormalizeData(object = huang.ob,
                          normalization.method = "LogNormalize",
                          scale.factor = 10000,
                          display.progress = FALSE)
zeisel.ob <- NormalizeData(object = zeisel.ob,
                           normalization.method = "LogNormalize",
                           scale.factor = 10000,
                           display.progress = FALSE)

huang.ob <- ScaleData(object = huang.ob,
                      vars.to.regress = c("nUMI", "percent.mito", "BatchID"),
                      model.use = "linear",
                      do.scale = TRUE,
                      scale.max = 10,
                      do.center = TRUE,
                      do.par = TRUE,
                      num.cores = 4,
                      display.progress = FALSE)
zeisel.ob <- ScaleData(object = zeisel.ob,
                       vars.to.regress = c("nUMI", "percent.mito"),
                       model.use = "linear",
                       do.scale = TRUE,
                       scale.max = 10,
                       do.center = TRUE,
                       do.par = TRUE,
                       num.cores = 4,
                       display.progress = FALSE)
```


### 2.3 Run multi-dataset CCA and align datasets into a merged object

#### 2.3.1 Find highly variable genes (HVGs) in each dataset
```{r, fig.width=10}
huang.ob <- FindVariableGenes(object = huang.ob,
                              mean.function = ExpMean,
                              dispersion.function = LogVMR,
                              x.low.cutoff = 0.075,
                              x.high.cutoff = 4,
                              y.cutoff = 0.5,
                              num.bin = 100)
length(huang.ob@var.genes)

zeisel.ob <- FindVariableGenes(object = zeisel.ob,
                              mean.function = ExpMean,
                              dispersion.function = LogVMR,
                              x.low.cutoff = 0.075,
                              x.high.cutoff = 4,
                              y.cutoff = 0.5,
                              num.bin = 100)
length(zeisel.ob@var.genes)
```

#### 2.3.2 Find HVGs
```{r}
hvgs <- union(huang.ob@var.genes, zeisel.ob@var.genes)
length(hvgs)
```


#### 2.3.2 Run CCA
```{r}
huang.ob@meta.data$dataset <- "Huang"
zeisel.ob@meta.data$dataset <- "Zeisel"
combined.ob <- RunCCA(object = huang.ob,
                      object2 = zeisel.ob,
                      genes.use = hvgs,
                      num.cc = 30,
                      scale.data = TRUE)
```

#### 2.3.3 Visualize and select CVs
```{r, fig.width=10}
for (i in 1:10) {
  DimHeatmap(object = combined.ob,
             reduction.type = "cca",
             cells.use = 200,
             dim.use = (3*(i-1)+1):(3*i),
             do.balanced = TRUE)
}
```

```{r, fig.width=16}
MetageneBicorPlot(object = combined.ob, 
                  grouping.var = "dataset", 
                  dims.eval = 1:30,
                  display.progress = FALSE)
```

#### 2.3.4 Subspace alignment
```{r}
combined.ob <- AlignSubspace(object = combined.ob,
                             reduction.type = "cca",
                             grouping.var = "dataset",
                             dims.align = 1:7,
                             verbose = FALSE)
```

```{r, fig.width=16}
VlnPlot(object = combined.ob,
        features.plot = c("CC1", "ACC1", "CC2", "ACC2",
                          "CC3", "ACC3", "CC4", "ACC4"),
        nCol = 4,
        group.by = "dataset",
        point.size.use = 0)
```


### 3. Clustering & DR

#### 3.1 UMAP on aligned CVs
```{r}
combined.ob <- RunUMAP(object = combined.ob,
                       reduction.use = "cca.aligned",
                       dims.use = 1:7,
                       n_neighbors = 30L,
                       min_dist = 0.1,
                       metric = "correlation")
```

Re-label cells by dataset + cluster names:
```{r}
huang.cellnames.I <- WhichCells(object = dRaphe.neurons.5HT,
                                ident = "5-HT-I")
huang.cellnames.II <- WhichCells(object = dRaphe.neurons.5HT,
                                ident = "5-HT-II")
huang.cellnames.III <- WhichCells(object = dRaphe.neurons.5HT,
                                ident = "5-HT-III")
huang.cellnames.IV <- WhichCells(object = dRaphe.neurons.5HT,
                                ident = "5-HT-IV")
huang.cellnames.V <- WhichCells(object = dRaphe.neurons.5HT,
                                ident = "5-HT-V")

zeisel.cellnames.l <- WhichCells(object = zeisel.ob.SER,
                                ident = "R1 (lateral DRN)")
zeisel.cellnames.m <- WhichCells(object = zeisel.ob.SER,
                                ident = "R1 (medial DRN)")

combined.ob <- SetIdent(object = combined.ob,
                        cells.use = huang.cellnames.I,
                        ident.use = "Huang_5-HT-I")
combined.ob <- SetIdent(object = combined.ob,
                        cells.use = huang.cellnames.II,
                        ident.use = "Huang_5-HT-II")
combined.ob <- SetIdent(object = combined.ob,
                        cells.use = huang.cellnames.III,
                        ident.use = "Huang_5-HT-III")
combined.ob <- SetIdent(object = combined.ob,
                        cells.use = huang.cellnames.IV,
                        ident.use = "Huang_5-HT-IV")
combined.ob <- SetIdent(object = combined.ob,
                        cells.use = huang.cellnames.V,
                        ident.use = "Huang_5-HT-V")
combined.ob <- SetIdent(object = combined.ob,
                        cells.use = zeisel.cellnames.l,
                        ident.use = "Zeisel_R1DR-lateral")
combined.ob <- SetIdent(object = combined.ob,
                        cells.use = zeisel.cellnames.m,
                        ident.use = "Zeisel_R1DR-medial")
```

```{r, fig.width=16}
p1 <- DimPlot(object = combined.ob,
             reduction.use = "umap",
             do.label = FALSE,
             no.legend = FALSE,
             pt.size = 3,
             group.by = "dataset",
             do.return = TRUE)
p2 <- DimPlot(object = combined.ob,
             reduction.use = "umap",
             do.label = FALSE,
             no.legend = FALSE,
             pt.size = 3,
             do.return = TRUE)
plot_grid(p1, p2)
```

```{r, fig.width=16}
FeaturePlot(object = combined.ob,
            nCol = 4,
            reduction.use = "umap",
            features.plot = c("Slc6a4", "Tph2", "Fev", "En1",
                              "Prkcq", "Asb4", "Hcrtr1", "Trh",
                              "Pdyn", "Slc17a8", "Cbln2", "Met"),
            cols.use = c("gray", "red"),
            no.legend = FALSE,
            pt.size = 1)
```


#### 3.2 Try PCA
```{r}
combined.ob <- RunPCA(object = combined.ob,
                      pc.genes = hvgs,
                      pcs.compute = 30,
                      weight.by.var = FALSE,
                      pcs.print = NA)
```

```{r, fig.width=10}
for (i in 1:10) {
  DimHeatmap(object = combined.ob,
             reduction.type = "pca",
             cells.use = 200,
             dim.use = (3*(i-1)+1):(3*i),
             do.balanced = TRUE)
}
```

```{r, fig.width=16}
PCElbowPlot(object = combined.ob,
            num.pc = 30)
```

```{r}
combined.ob <- RunUMAP(object = combined.ob,
                       reduction.use = "pca",
                       dims.use = 1:11,
                       n_neighbors = 30L,
                       min_dist = 0.1,
                       metric = "correlation")
```

```{r, fig.width=16}
p3 <- DimPlot(object = combined.ob,
             reduction.use = "umap",
             do.label = FALSE,
             no.legend = FALSE,
             pt.size = 3,
             group.by = "dataset",
             do.return = TRUE)
p4 <- DimPlot(object = combined.ob,
             reduction.use = "umap",
             do.label = FALSE,
             no.legend = FALSE,
             pt.size = 3,
             do.return = TRUE)
plot_grid(p3, p4)
```

```{r, fig.width=16}
p3b <- DimPlot(object = combined.ob,
             reduction.use = "umap",
             do.label = FALSE,
             no.legend = FALSE,
             pt.size = 3,
             group.by = "BatchID",
             do.return = TRUE)
plot_grid(p3, p3b)
```

```{r, fig.width=16}
FeaturePlot(object = combined.ob,
            nCol = 4,
            reduction.use = "umap",
            features.plot = c("Slc6a4", "Tph2", "Fev", "En1",
                              "Prkcq", "Asb4", "Hcrtr1", "Trh",
                              "Pdyn", "Slc17a8", "Cbln2", "Met"),
            cols.use = c("gray", "red"),
            no.legend = FALSE,
            pt.size = 1)
```

```{r, fig.width=16}
FeaturePlot(object = combined.ob,
            nCol = 4,
            reduction.use = "umap",
            features.plot = c("nUMI", "nGene", "percent.mito", "Snap25",
                              "PC1", "PC2", "PC3", "PC4",
                              "PC5", "PC6", "PC7", "PC8"),
            cols.use = c("gray", "red"),
            no.legend = FALSE,
            pt.size = 1)
```

Try running UMAP but now excluding PCs 1:3 -- likely related to batch effects separating datasets and batches.
```{r}
combined.ob <- RunUMAP(object = combined.ob,
                       reduction.use = "pca",
                       dims.use = 4:10,
                       n_neighbors = 30L,
                       min_dist = 0.1,
                       metric = "correlation")
```

```{r, fig.width=16}
p5 <- DimPlot(object = combined.ob,
             reduction.use = "umap",
             do.label = FALSE,
             no.legend = FALSE,
             pt.size = 3,
             group.by = "dataset",
             do.return = TRUE)
p6 <- DimPlot(object = combined.ob,
             reduction.use = "umap",
             do.label = FALSE,
             no.legend = FALSE,
             pt.size = 3,
             do.return = TRUE)
plot_grid(p5, p6)
```

```{r, fig.width=16}
plot_grid(p4, p6)
```

```{r, fig.width=16}
plot_grid(p2, p6)
```

```{r, fig.width=16}
FeaturePlot(object = combined.ob,
            nCol = 4,
            reduction.use = "umap",
            features.plot = c("Slc6a4", "Tph2", "Fev", "En1",
                              "Prkcq", "Asb4", "Hcrtr1", "Trh",
                              "Pdyn", "Slc17a8", "Cbln2", "Met"),
            cols.use = c("gray", "red"),
            no.legend = FALSE,
            pt.size = 1)
```

Zeisel R1DR neurons are appearing in the different subtype clusters from our dataset, rather than separating out.

```{r}
saveRDS(object = combined.ob,
        file = file.path(wd, "Combined_Huang_5-HT_Zeisel_R1DR.rds"))
```


## Session Information

Machine specifications:  

* Mac Pro (Late 2013)  
* macOS High Sierra 10.13.4  
* 3.7 GHz Quad-Core Intel Xeon E5  
* 64 GB 1866 MHz DDR3  
* Java version "1.8.0_172"  

```{r}
devtools::session_info()
```